Spatial interpolation

The contents will appear later

This section will contain hands-on material that will appear later.

import geopandas as gpd
import tobler
import pyinterpolate
import numpy as np

from libpysal import graph
from sklearn import neighbors
from scipy import interpolate

Area interpolation

simd = gpd.read_file(
    "https://martinfleischmann.net/sds/chapter_09/data/edinburgh_simd_2020.gpkg"
)
simd.head(2)
DataZone DZName LAName SAPE2017 WAPE2017 Rankv2 Quintilev2 Decilev2 Vigintilv2 Percentv2 ... CrimeRate CrimeRank HouseNumOC HouseNumNC HouseOCrat HouseNCrat HouseRank Shape_Leng Shape_Area geometry
0 S01008417 Balerno and Bonnington Village - 01 City of Edinburgh 708 397 5537 4 8 16 80 ... 86 5392.0 17 8 2% 1% 6350.0 20191.721420 1.029993e+07 POLYGON ((315157.369 666212.846, 315173.727 66...
1 S01008418 Balerno and Bonnington Village - 02 City of Edinburgh 691 378 6119 5 9 18 88 ... 103 5063.0 7 10 1% 1% 6650.0 25944.861787 2.357050e+07 POLYGON ((317816.000 666579.000, 318243.000 66...

2 rows × 52 columns

simd[["EmpRate", "geometry"]].explore("EmpRate", tiles="CartoDB Positron")
Make this Notebook Trusted to load map: File -> Trust Notebook
grid_8 = tobler.util.h3fy(simd, resolution=8)
grid_8.head(2)
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/pyproj/crs/crs.py:1293: UserWarning: You will likely lose important projection information when converting to a PROJ string from another format. See: https://proj.org/faq.html#what-is-the-best-format-for-describing-coordinate-reference-systems
  proj = self._crs.to_proj4(version=version)
geometry
hex_id
8819727761fffff POLYGON ((315895.903 677993.995, 315431.809 67...
88197270d7fffff POLYGON ((313110.882 677234.253, 312646.604 67...
m = simd.boundary.explore(tiles="CartoDB Positron")
grid_8.boundary.explore(m=m, color="red")
Make this Notebook Trusted to load map: File -> Trust Notebook

overlay

interpolated = tobler.area_weighted.area_interpolate(
    simd,
    grid_8,
    extensive_variables=["EmpNumDep", "IncNumDep"],
    intensive_variables=["EmpRate", "IncRate"],
)
interpolated.explore("EmpRate", tiles="CartoDB Positron")
Make this Notebook Trusted to load map: File -> Trust Notebook

Point interpolation

airbnb = gpd.read_file(
    "https://martinfleischmann.net/sds/chapter_09/data/edinburgh_airbnb_2023.gpkg"
)
airbnb.head()
id bedrooms property_type price geometry
0 15420 1.0 Entire rental unit $126.00 POINT (325921.137 674478.931)
1 790170 2.0 Entire condo $269.00 POINT (325976.360 677655.252)
2 24288 2.0 Entire loft $95.00 POINT (326069.186 673072.913)
3 821573 2.0 Entire rental unit $172.00 POINT (326748.646 674001.683)
4 822829 3.0 Entire rental unit $361.00 POINT (325691.831 674328.127)
airbnb.price.head()
0    $126.00
1    $269.00
2     $95.00
3    $172.00
4    $361.00
Name: price, dtype: object
airbnb["price_float"] = airbnb.price.str.strip("$").str.replace(",", "").astype(float)
two_bed_homes = airbnb[
    (airbnb["bedrooms"] == 2)
    & (airbnb["property_type"] == "Entire rental unit")
    & (airbnb["price_float"] < 300)
].copy()
two_bed_homes.head()
id bedrooms property_type price geometry price_float
3 821573 2.0 Entire rental unit $172.00 POINT (326748.646 674001.683) 172.0
5 834777 2.0 Entire rental unit $264.00 POINT (324950.724 673875.598) 264.0
6 450745 2.0 Entire rental unit $177.00 POINT (326493.725 672853.904) 177.0
10 485856 2.0 Entire rental unit $157.00 POINT (326597.124 673869.551) 157.0
17 51505 2.0 Entire rental unit $155.00 POINT (325393.807 674177.409) 155.0
two_bed_homes.geometry.duplicated().any()
True
two_bed_homes = two_bed_homes.drop_duplicates("geometry")
two_bed_homes.explore("price_float", tiles="CartoDB Positron")
Make this Notebook Trusted to load map: File -> Trust Notebook
from libpysal import graph

knn = graph.Graph.build_kernel(two_bed_homes, k=10).transform("r")
two_bed_homes["price_lag"] = knn.lag(two_bed_homes.price_float)
two_bed_homes.explore("price_lag", tiles="CartoDB Positron")
Make this Notebook Trusted to load map: File -> Trust Notebook

Nearest

grid_10 = tobler.util.h3fy(simd, resolution=10)
len(grid_10)
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/pyproj/crs/crs.py:1293: UserWarning: You will likely lose important projection information when converting to a PROJ string from another format. See: https://proj.org/faq.html#what-is-the-best-format-for-describing-coordinate-reference-systems
  proj = self._crs.to_proj4(version=version)
20162
grid_coordinates = grid_10.centroid.get_coordinates()
grid_coordinates.head()
x y
hex_id
8a19727516e7fff 314396.239638 668649.425957
8a1972775007fff 319532.248509 675768.790051
8a197272bae7fff 315166.141117 678976.818733
8a19727406b7fff 318552.784356 669260.243895
8a1972393707fff 327996.723476 670069.583024
coords = two_bed_homes.get_coordinates()
coords.head()
x y
3 326748.645636 674001.683211
5 324950.723888 673875.598033
6 326493.725178 672853.903917
10 326597.123684 673869.551295
17 325393.807222 674177.408961
nearest = interpolate.griddata(
    coords,
    two_bed_homes.price_lag,
    grid_coordinates,
    method="nearest",
)
nearest
array([ 34.        , 188.70398583, 106.14780144, ..., 188.70398583,
       188.70398583,  84.        ])
grid_10["nearest"] = nearest
_ = grid_10.plot('nearest', legend=True)

caption here
ax = grid_10.plot('nearest', legend=True)
two_bed_homes.plot(ax=ax, color="red", markersize=1)
<Axes: >

caption here

K-Nearest neighbours regression

Uniform

interpolation_uniform = neighbors.KNeighborsRegressor(n_neighbors=10, weights="uniform")
interpolation_uniform.fit(
    coords, two_bed_homes.price_lag
)
KNeighborsRegressor(n_neighbors=10)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
price_on_grid = interpolation_uniform.predict(grid_coordinates)
price_on_grid
array([ 93.47717891, 130.09448585, 125.97986269, ..., 120.36697169,
       120.36697169, 120.36697169])
grid_10["knn_uniform"] = price_on_grid
_ = grid_10.plot("knn_uniform", legend=True)

caption here

Distance-weighted

interpolation_distance = neighbors.KNeighborsRegressor(n_neighbors=10, weights="distance")
interpolation_distance.fit(
    coords, two_bed_homes.price_lag
)
KNeighborsRegressor(n_neighbors=10, weights='distance')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
grid_10["knn_distance"] = interpolation_distance.predict(grid_coordinates)
_ = grid_10.plot("knn_distance", legend=True)

caption here

Distance band regression

interpolation_radius = neighbors.RadiusNeighborsRegressor(radius=1000, weights="distance")
interpolation_radius.fit(
    coords, two_bed_homes.price_lag
)
RadiusNeighborsRegressor(radius=1000, weights='distance')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
grid_10["radius"] = interpolation_radius.predict(grid_coordinates)
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/sklearn/neighbors/_regression.py:500: UserWarning: One or more samples have no neighbors within specified radius; predicting NaN.
  warnings.warn(empty_warning_msg)
_ = grid_10.plot("radius", legend=True, missing_kwds={'color': 'lightgrey'})

caption here

Ordinary Kriging

input_data = np.hstack([coords, two_bed_homes.price_lag.values.reshape(-1, 1)])
input_data
array([[3.26748646e+05, 6.74001683e+05, 2.07033397e+02],
       [3.24950724e+05, 6.73875598e+05, 1.54805935e+02],
       [3.26493725e+05, 6.72853904e+05, 1.43865293e+02],
       ...,
       [3.28513265e+05, 6.74048892e+05, 1.06875409e+02],
       [3.26840903e+05, 6.74767224e+05, 1.68848108e+02],
       [3.25415664e+05, 6.73345158e+05, 2.15847334e+02]])
step_radius = 100  # meters
max_range = 5000  # meters

exp_semivar = pyinterpolate.build_experimental_variogram(
    input_array=input_data,
    step_size=step_radius,
    max_range=max_range,
)
exp_semivar.plot()

caption here
semivar = pyinterpolate.build_theoretical_variogram(
    experimental_variogram=exp_semivar,
    model_name='linear',
    sill=exp_semivar.variance,
    rang=5000
)
semivar.plot()

caption here
ordinary_kriging = pyinterpolate.kriging(
    observations=input_data,
    theoretical_model=semivar,
    points=grid_coordinates.values,
    no_neighbors=10,
    how="ok",
    show_progress_bar=False,
)
grid_10["ordinary_kriging"] = ordinary_kriging[:, 0]
_ = grid_10.plot("ordinary_kriging", legend=True)

caption here
grid_10["variance_error"] = ordinary_kriging[:, 1]
_ = grid_10.plot("variance_error", legend=True)

caption here

Use larger distance

semivar_larger = pyinterpolate.build_theoretical_variogram(
    experimental_variogram=exp_semivar,
    model_name='linear',
    sill=exp_semivar.variance,
    rang=15000,
)
ordinary_kriging_l = pyinterpolate.kriging(
    observations=input_data,
    theoretical_model=semivar_larger,
    points=grid_coordinates.values,
    no_neighbors=10,
    how="ok",
    show_progress_bar=False,
)
grid_10["ok_larger"] = ordinary_kriging_l[:, 0]
_ = grid_10.plot("ok_larger", legend=True)

caption here
grid_10["variance_error_l"] = ordinary_kriging_l[:, 1]
_ = grid_10.plot("variance_error_l", legend=True)

caption here